Contents
  1. 1. 部署spark
  2. 2. Thrift Server Lake
    1. 2.1. 停止脚本
  3. 3. Thrift Server Zeppelin
    1. 3.1. 停止脚本
  4. 4. 保活
  5. 5. 重启
  6. 6. crontab

部署spark

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
cd /data/soft
hdfs dfs -get /spark/pkg/spark-3.5.6-bin-hadoop3.tgz && tar -xf spark-3.5.6-bin-hadoop3.tgz && rm spark-3.5.6-bin-hadoop3.tgz && cd spark-3.5.6-bin-hadoop3

hdfs dfs -get /spark/pkg/SparkHiveAuth-1.0-SNAPSHOT.jar jars
hdfs dfs -get /spark/jdbc/mysql-connector-java-5.1.47.jar jars
hdfs dfs -get /spark/jdbc/iceberg-spark-runtime-3.5_2.12-1.6.0.jar jars

cd conf
hdfs dfs -get /spark/pkg/hive-site.xml
hdfs dfs -get /spark/pkg/thriftAuth
hdfs dfs -get /spark/pkg/spark-env.sh
hdfs dfs -get /spark/pkg/spark-defaults.conf

#sed -i 's/355/356/' spark-defaults.conf

# spark-daemon.sh去除$SPARK_PID_DIR检查
# 或者指定pid目录
# 或者通过下面启动脚本指定instance环境变量
#cd ../sbin
#cp spark-daemon.sh spark-thrift-daemon.sh
#vi spark-thrift-daemon.sh

Thrift Server Lake

thrift-lake.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
instance="lake"
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
export SPARK_PID_DIR="${SPARK_HOME}/pid-${instance}"
export SPARK_LOG_DIR="${SPARK_HOME}/logs-${instance}"

CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"

exec "${SPARK_HOME}"/sbin/spark-daemon.sh submit $CLASS $instance \
--name "Thrift ${instance}" \
--hiveconf hive.server2.thrift.port=10199 \
--hiveconf hive.default.fileformat=Orc \
--hiveconf hive.server2.authentication=CUSTOM \
--master yarn \
--queue a17 \
--deploy-mode client \
--driver-cores 2 \
--driver-memory 5G \
--executor-cores 6 \
--executor-memory 15G \
--num-executors 3 \
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
--conf spark.sql.catalog.prod=org.apache.iceberg.spark.SparkSessionCatalog \
--conf spark.sql.catalog.spark_catalog.type=hive \
--conf spark.driver.maxResultSize=200MB \
--conf spark.default.parallelism=500 \
--conf spark.sql.shuffle.partitions=500 \
--conf spark.sql.adaptive.enabled=true \
--conf spark.scheduler.mode=FAIR \
--conf spark.network.timeout=6000s \
--conf spark.memory.fraction=0.8 \
--conf spark.dynamicAllocation.shuffleTracking.enabled=true \
--conf spark.dynamicAllocation.shuffleTracking.timeout=180000 \
--conf spark.dynamicAllocation.enabled=true \
--conf spark.dynamicAllocation.minExecutors=3 \
--conf spark.dynamicAllocation.maxExecutors=50 \
--jars hdfs:///spark/jdbc/ojdbc6-11.2.0.3.jar,hdfs:///spark/jdbc/clickhouse-jdbc-0.4.6.jar,hdfs:///spark/jdbc/guava-31.0.1-jre.jar,hdfs:///spark/jdbc/iceberg-spark-runtime-3.5_2.12-1.6.0.jar

停止脚本

thrift-lake-stop.sh
1
2
3
4
5
6
7
instance="lake"
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
export SPARK_PID_DIR="${SPARK_HOME}/pid-${instance}"
export SPARK_LOG_DIR="${SPARK_HOME}/logs-${instance}"

"${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 ${instance}

Thrift Server Zeppelin

thrift-zeppelin.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
instance="zeppelin"
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
export SPARK_PID_DIR="${SPARK_HOME}/pid-${instance}"
export SPARK_LOG_DIR="${SPARK_HOME}/logs-${instance}"

CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"

exec "${SPARK_HOME}"/sbin/spark-daemon.sh submit $CLASS $instance \
--name "Thrift ${instance}" \
--hiveconf hive.server2.thrift.port=10198 \
--hiveconf hive.default.fileformat=Orc \
--hiveconf hive.server2.authentication=CUSTOM \
--master yarn \
--queue a17 \
--deploy-mode client \
--driver-cores 2 \
--driver-memory 5G \
--executor-cores 6 \
--executor-memory 15G \
--num-executors 3 \
--proxy-user zeppelin \
--conf spark.driver.maxResultSize=200MB \
--conf spark.default.parallelism=200 \
--conf spark.sql.shuffle.partitions=200 \
--conf spark.sql.adaptive.enabled=true \
--conf spark.scheduler.mode=FAIR \
--conf spark.network.timeout=600s \
--conf spark.memory.fraction=0.8 \
--conf spark.dynamicAllocation.shuffleTracking.enabled=true \
--conf spark.dynamicAllocation.shuffleTracking.timeout=18000 \
--conf spark.dynamicAllocation.enabled=true \
--conf spark.dynamicAllocation.minExecutors=3 \
--conf spark.dynamicAllocation.maxExecutors=40 \
--jars hdfs:///spark/jdbc/ojdbc6-11.2.0.3.jar,hdfs:///spark/jdbc/clickhouse-jdbc-0.4.6.jar,hdfs:///spark/jdbc/guava-31.0.1-jre.jar

停止脚本

1
2
cp thrift-lake-stop.sh  thrift-zeppelin-stop.sh
sed -i 's/lake/zeppelin/' thrift-zeppelin-stop.sh

保活

thrift-daemon.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/bin/bash
EXPECTED_ARGS=2 # 期望的参数数量
if [ $# -ne $EXPECTED_ARGS ]; then
echo "错误: 需要 exactly $EXPECTED_ARGS 个参数"
exit 1
fi

source /etc/profile

CURTIME=`date +%Y%m%d-%H:%M:%S`

instance=$1
port=$2
name="Thrift ${instance}"

pscount=`ps -ef | grep "${name}" |grep -v "grep"|wc -l`
yarncount=`yarn application -list | grep "${name}" | grep RUNNING |wc -l`
pid=`ps -ef | grep "${name}" |grep -v "grep" |awk -F' ' '{ print $2 }'`
isListen=`netstat -plntu |grep $port |wc -l`

if [[ $isListen -lt 1 || $pscount == 0 || $yarncount == 0 ]];
then
/bin/sh /data/soft/spark/sbin/thrift-${instance}-stop.sh
/bin/kill -9 $pid
echo "${CURTIME} 重启" >> /data/soft/spark/logs-${instance}/daemon_thrift-${instance}.log
/bin/sh /data/soft/spark/sbin/thrift-${instance}.sh
exit 0
else
touch /data/soft/spark/logs-${instance}/daemon_thrift-${instance}.log
fi

重启

thrift-restart.sh
1
2
3
4
5
6
7
8
9
10
#!/bin/bash
EXPECTED_ARGS=1 # 期望的参数数量
if [ $# -ne $EXPECTED_ARGS ]; then
echo "错误: 需要 exactly $EXPECTED_ARGS 个参数"
exit 1
fi

instance=$1
/bin/sh /data/soft/spark/sbin/thrift-${instance}-stop.sh
/bin/sh /data/soft/spark/sbin/thrift-${instance}.sh

crontab

1
2
3
4
5
6
*/5 * * * * /bin/sh /data/soft/spark/sbin/thrift-daemon.sh lake 10199
*/5 * * * * /bin/sh /data/soft/spark/sbin/thrift-daemon.sh zeppelin 10198
0 8 * * * /bin/sh /data/soft/spark/sbin/thrift-restart.sh zeppelin
0 13 * * * /bin/sh /data/soft/spark/sbin/thrift-restart.sh zeppelin
0 19 * * * /bin/sh /data/soft/spark/sbin/thrift-restart.sh zeppelin
0 21 * * * /bin/sh /data/soft/spark/sbin/thrift-restart.sh zeppelin